Assignment:

  1. Extract the meteorological data URLs. Here we want you to use the rvest package to get the URLs for the SASP forcing and SBSP_forcing meteorological datasets.
#Read URL
site_url <- 'https://snowstudies.org/archived-data/'
webpage <- read_html(site_url)

#Extract only weblinks for SASP and SBSP forcing datasets
links <- webpage %>%
  html_nodes('a') %>%
  .[grepl('forcing',.)] %>%
  html_attr('href')
  1. Download the meteorological data. Use the download_file and str_split_fixed commands to download the data and save it in your data folder. You can use a for loop or a map function.
#Grab only file name by splitting out on forward slashes
splits <- str_split_fixed(links,'/',8)

#Keep only 8th column
dataset <- splits[,8]

#Generate file list for where data goes
file_names <- paste0('data/',dataset)

#Download data in a map
map2(links[1:2],file_names[1:2],download.file)
  1. Write a custom function to read in the data and append a site column to the data.
#Given in assignment: this code grabs the variable names from the metadata pdf file
library(pdftools)
headers <- pdf_text('https://snowstudies.org/wp-content/uploads/2022/02/Serially-Complete-Metadata-text08.pdf') %>%
  readr::read_lines(.) %>%
  trimws(.) %>%
  str_split_fixed(.,'\\.',2) %>%
  .[,2] %>%
  .[1:26] %>%
  str_trim(side = "left")

#Function to read in data, re-name relevant columns, append a site column
met_reader <- function(file){
  name = str_split_fixed(file,'_',2)[,2] %>%
    gsub('_Forcing_Data.txt','',.)
  df <- read.delim(file, sep="", col.names = headers) %>%
    select(year, month, day, hour, precip = precip..kg.m.2.s.1., air_temp = air.temp..K.) %>%
    mutate(site = name)
}
  1. Use the map function to read in both meteorological files. Display a summary of your tibble.
#Read in and display both files
met_data <- map_dfr(file_names, met_reader)
summary(met_data)
##       year          month             day             hour      
##  Min.   :2003   Min.   : 1.000   Min.   : 1.00   Min.   : 0.00  
##  1st Qu.:2005   1st Qu.: 3.000   1st Qu.: 8.00   1st Qu.: 5.75  
##  Median :2007   Median : 6.000   Median :16.00   Median :11.50  
##  Mean   :2007   Mean   : 6.472   Mean   :15.76   Mean   :11.50  
##  3rd Qu.:2009   3rd Qu.: 9.000   3rd Qu.:23.00   3rd Qu.:17.25  
##  Max.   :2011   Max.   :12.000   Max.   :31.00   Max.   :23.00  
##      precip             air_temp         site          
##  Min.   :0.000e+00   Min.   :242.1   Length:138336     
##  1st Qu.:0.000e+00   1st Qu.:265.8   Class :character  
##  Median :0.000e+00   Median :272.6   Mode  :character  
##  Mean   :3.838e-05   Mean   :272.6                     
##  3rd Qu.:0.000e+00   3rd Qu.:279.7                     
##  Max.   :6.111e-03   Max.   :295.8
  1. Make a line plot of mean temp by year by site (using the air temp [K] variable). Is there anything suspicious in the plot? Adjust your filtering if needed.
#Calculate mean temp by year and site
annual_temp <- met_data %>%
  group_by(year,site) %>%
  summarize(mean_temp = mean(`air_temp`))

#Plot mean temp by year and site
ggplot(annual_temp, aes(x=year,y=mean_temp,color=site)) +
  geom_line() +
  theme_few() +
  scale_color_few() + 
  theme(legend.position = c(0.85,0.2)) +
  xlab("Year") + ylab("Mean air temperature (K)")

The mean annual temperature for 2003 is much lower than other years. Checking the data shows incomplete annual data for 2003 (only Nov and Dec) and 2011 (missing Oct, Nov, and Dec). Those years will be filtered out below.

#Calculate mean temp by year and site (excluding 2003,2011)
annual_temp <- met_data %>%
  filter(!(year %in% c(2003,2011))) %>%
  group_by(year,site) %>%
  summarize(mean_temp = mean(`air_temp`))

#Plot mean temp by year and site (excluding 2003,2011)
ggplot(annual_temp, aes(x=year,y=mean_temp,color=site)) +
  geom_line() +
  theme_few() +
  scale_color_few() + 
  theme(legend.position = c(0.85,0.2)) +
  xlab("Year") + ylab("Mean air temperature (K)")

  1. Write a function that makes line plots of monthly average temperature at each site for a given year. Use a for loop to make these plots for 2005 to 2010. Are monthly average temperatures at the Senator Beck Study Plot ever warmer than the Snow Angel Study Plot? Hint: https://ggplot2.tidyverse.org/reference/print.ggplot.html

No, monthly average air temperatures at SBSP are at no point warmer than SASP between 2005 and 2010.

#Function to make line plots of monthly mean temps (by site) for a given year
monthly_temp_f <- function(yr,df){
  plot_df <- df %>%
    filter(year == yr) %>%
    group_by(month,site) %>%
    summarize(mean_temp = mean(air_temp))
  
  a <- ggplot(plot_df, aes(x=month,y=mean_temp,color=site)) +
    geom_line() +
    theme_few() +
    scale_color_few() +
    theme(legend.position = c(0.9,0.85)) +
    xlab("Month") + ylab("Mean air temperature (K)") +
    ggtitle(as.character(yr))
  
  print(a)
}

#Loop from 2005-2010
for(yr in 2005:2010){
  monthly_temp_f(yr, met_data)
}

Bonus #1: Make a plot of average daily precipitation by day of year (averaged across all available years).

#Create "day of year" column
met_data$date <- as.Date(with(met_data, paste(year, month, day, sep="/")), "%Y/%m/%d")
met_data$doy <- yday(met_data$date)

#Calculate mean daily precipitation by day of year
daily_mean_precip <- met_data %>%
  group_by(doy,site) %>%
  summarize(mean_precip = mean(precip)) %>%
  mutate(daily_precip = mean_precip * 24)

#Plot mean daily precipitation by day of year
ggplot(daily_mean_precip, aes(x=doy,y=daily_precip,color=site)) +
  geom_line() +
  theme_few() +
  scale_color_few() +
  theme(legend.position = "none") +
  xlab("Day of year") + ylab("Mean daily precipitation (kg/m2/s)")

Bonus #2: Use a function and for loop to create yearly plots of precipitation by day of year.

#Function to plot annual precipitation by day of year
daily_precip_f <- function(yr,df){
  plot_df <- df %>%
    filter(year == yr) %>%
    group_by(doy,site) %>%
    summarize(mean_precip = mean(precip)) %>%
    mutate(daily_precip = mean_precip * 24)
  
  a <- ggplot(plot_df, aes(x=doy,y=daily_precip,color=site)) +
    geom_line() +
    theme_few() +
    scale_color_few() +
    theme(legend.position = "none") +
    xlab("Day of year") + ylab("Mean daily precipitation (kg/m2/s)") +
    ggtitle(as.character(yr))
  
  print(a)
}

#Loop from 2004-2010
for(yr in 2004:2010){
  daily_precip_f(yr, met_data)
}